#make a new column which categorises the type of natural disaster as direct effetc of climate hcange, indirect effetc of climate chnage and not related

# Define vectors of natural disasters classified as primary and secondary effects of climate change
primary_effects <- c("Extreme temperature", "Flood", "Storm")
secondary_effects <- c("Glacial lake outburst flood", "Drought", "Wildfire",  "Mass movement (wet)", " Mass movement (dry)")

# Create a new column indicating the effect of the natural disaster related to climate change
full_data <- updated_data %>%
  mutate(climate_change_effect = case_when(
    updated_data$type %in% primary_effects ~ "Primary effect",
    updated_data$type  %in% secondary_effects ~ "Secondary effect",
    TRUE ~ "Not related"
  ))

full_data
## # A tibble: 9,505 × 16
##    historic id         classification group subgroup type  subtype iso   country
##    <chr>    <chr>      <chr>          <chr> <chr>    <chr> <chr>   <chr> <chr>  
##  1 No       1999-9388… nat-cli-dro-d… Natu… Climato… Drou… Drought DJI   Djibou…
##  2 No       1999-9388… nat-cli-dro-d… Natu… Climato… Drou… Drought SDN   Sudan  
##  3 No       1999-9388… nat-cli-dro-d… Natu… Climato… Drou… Drought SOM   Somalia
##  4 No       2000-0002… nat-hyd-flo-r… Natu… Hydrolo… Flood Riveri… AGO   Angola 
##  5 No       2000-0003… nat-met-ext-c… Natu… Meteoro… Extr… Cold w… BGD   Bangla…
##  6 No       2000-0008… nat-geo-vol-a… Natu… Geophys… Volc… Ash fa… GTM   Guatem…
##  7 No       2000-0009… nat-met-sto-s… Natu… Meteoro… Storm Storm … IRN   Iran (…
##  8 No       2000-0012… nat-hyd-flo-r… Natu… Hydrolo… Flood Riveri… MOZ   Mozamb…
##  9 No       2000-0017… nat-cli-wil-l… Natu… Climato… Wild… Land f… ZAF   South …
## 10 No       2000-0019… nat-hyd-flo-r… Natu… Hydrolo… Flood Riveri… BRA   Brazil 
## # ℹ 9,495 more rows
## # ℹ 7 more variables: subregion <chr>, region <chr>, location <chr>,
## #   year <dbl>, month <dbl>, day <dbl>, climate_change_effect <chr>

Data Origins

The data set comes from the Centre for Research on the Epidemiology of Disasters (CRED). This organisation records every instances of natural disasters since 1900 within the EM-DAT database. This comprehensive open source database complies data from various sources; UN agencies, government agencies, research centers, humanitarian organisations, reinsurance companies and world press agencies. For a full list of sources see the EM-DAT website. I chose to download all the information regarding natural disasters between the year 1922 and 2022 totaling 16388 disasters. After looking at the data it was clear that the historic record before 2000 was too sparse to stand up against the quality of data recording conducted by CRED since its inception in 2000. Rather than looking at changes over a century using the historic record I have decided to focus on non historic entries of natural disasters which have occurred since 2000. Natural disaster between the year 2000 and 2022 totals 9505.

Research Questions

Variables

To answer my research questions I am interested in where and when different natural disasters occurred. The following variables are of potential interest in asking these questions:

for further explanation of each variable see the codebook provided by the EM-DATA database

#summary of number of disaster per subgroup
full_data %>% group_by(subgroup) %>% summarise(count = n())
## # A tibble: 6 × 2
##   subgroup          count
##   <chr>             <int>
## 1 Biological          910
## 2 Climatological      678
## 3 Extra-terrestrial     1
## 4 Geophysical         760
## 5 Hydrological       4275
## 6 Meteorological     2881
#summary of number of disasters per subgroup per type
full_data %>% group_by(subgroup, type) %>% summarise(count = n())
## `summarise()` has grouped output by 'subgroup'. You can override using the
## `.groups` argument.
## # A tibble: 14 × 3
## # Groups:   subgroup [6]
##    subgroup          type                        count
##    <chr>             <chr>                       <int>
##  1 Biological        Animal incident                 1
##  2 Biological        Epidemic                      880
##  3 Biological        Infestation                    29
##  4 Climatological    Drought                       393
##  5 Climatological    Glacial lake outburst flood     3
##  6 Climatological    Wildfire                      282
##  7 Extra-terrestrial Impact                          1
##  8 Geophysical       Earthquake                    626
##  9 Geophysical       Mass movement (dry)            13
## 10 Geophysical       Volcanic activity             121
## 11 Hydrological      Flood                        3852
## 12 Hydrological      Mass movement (wet)           423
## 13 Meteorological    Extreme temperature           479
## 14 Meteorological    Storm                        2402
#summary of number of disasters per subtype
full_data %>% group_by(subtype) %>% summarise(count = n())
## # A tibble: 45 × 2
##    subtype               count
##    <chr>                 <int>
##  1 Animal incident           1
##  2 Ash fall                 99
##  3 Avalanche (dry)           1
##  4 Avalanche (wet)          47
##  5 Bacterial disease       413
##  6 Blizzard/Winter storm   174
##  7 Coastal flood            41
##  8 Cold wave               219
##  9 Collision                 1
## 10 Derecho                   6
## # ℹ 35 more rows
# this variable may be to specific for the scope of my question


#summary of number of disasters per region 
full_data %>% group_by(region) %>% summarise(count = n())
## # A tibble: 5 × 2
##   region   count
##   <chr>    <int>
## 1 Africa    2032
## 2 Americas  2180
## 3 Asia      3703
## 4 Europe    1232
## 5 Oceania    358
#summary of number of disasters per region per subregion
full_data %>% group_by(region, country) %>% summarise(count = n())
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
## # A tibble: 218 × 3
## # Groups:   region [5]
##    region country                  count
##    <chr>  <chr>                    <int>
##  1 Africa Algeria                     49
##  2 Africa Angola                      63
##  3 Africa Benin                       31
##  4 Africa Botswana                    13
##  5 Africa Burkina Faso                36
##  6 Africa Burundi                     53
##  7 Africa Cabo Verde                   9
##  8 Africa Cameroon                    36
##  9 Africa Canary Islands               6
## 10 Africa Central African Republic    36
## # ℹ 208 more rows
#summary of number of disaster per year 
full_data %>% group_by(year, region) %>% summarise(count = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## # A tibble: 115 × 3
## # Groups:   year [23]
##     year region   count
##    <dbl> <chr>    <int>
##  1  2000 Africa     125
##  2  2000 Americas   101
##  3  2000 Asia       193
##  4  2000 Europe      94
##  5  2000 Oceania     12
##  6  2001 Africa     116
##  7  2001 Americas    95
##  8  2001 Asia       164
##  9  2001 Europe      52
## 10  2001 Oceania     19
## # ℹ 105 more rows
view(full_data)
graph_example <- full_data %>% group_by(subgroup, subtype, region, climate_change_effect) %>% summarise(count = n())
## `summarise()` has grouped output by 'subgroup', 'subtype', 'region'. You can
## override using the `.groups` argument.
view(graph_example)

#disaster by region - simple
ggplot(graph_example, aes(x = region, y = count)) +
  geom_bar(stat = "identity", position = "dodge", fill = "dark green") +
  labs(title = "Number of natural disasters between 2000 - 2022",
       y = "Number of Disasters",
       x = "Types of Disasters") +
  ylim(0,1000) +
  theme_classic() 

#by type
ggplot(graph_example, aes(x = subgroup, y = count)) +
  geom_bar(stat = "identity", position = "dodge", fill = "dark green") +
  labs(title = "Number of natural disasters between 2000 - 2022",
       y = "Number of Disasters",
       x = "Types of Disasters") +
  ylim(0,1000) +
  theme_classic() 

#build up graph - split by region 
ggplot(graph_example, aes(x = subgroup, y = count, fill = region)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Number of natural disasters between 2000-2022",
       y = "Number of Disasters",
       x = "Types of Disasters") +
  ylim(0,1000) +
  theme_classic() 

#total natural disasters
year_totals <- updated_data %>% group_by(year, region) %>% summarise(count = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
ggplot(year_totals, aes(x = year, y =count, fill = region)) +
  geom_bar(stat = "identity") +
  labs(title = "total number of natural disasters which have occured since 2000 split by region") +
  theme_classic()

#total number of natural disaster per region split by  climate change 
ggplot(graph_example, aes(x = region, y = count, fill = climate_change_effect)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Number of natural disasters between 2000-2022",
       y = "Number of Disasters",
       x = "Types of Disasters",
       fill = "climate change effect") +
  ylim(0,1000) +
  theme_classic() 

#want a line graph which charts changes in prevalence over time split by the 3 conditions. 
# graph where x is years, y is prevalance 
overall_disaster <- full_data %>%
  group_by(year) %>%
  summarise(total_disasters = n())

 ggplot(full_data, aes(x = year, y = after_stat(count), color = climate_change_effect)) +
  geom_line(stat = "count") +
    geom_line(data = overall_disaster, aes(y = total_disasters, color = "Total Disasters")) +
  labs(x = "Year", y = "Prevalence", color = "Climate Change Effect") +
  ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
  theme_minimal()

#exploring extra 

 ggplot(full_data, aes(x = year, y = after_stat(count), color = type)) +
  geom_line(stat = "count") +
  labs(x = "Year", y = "Prevalence", color = "type of disaster") +
  ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
  theme_minimal()

  ggplot(full_data, aes(x = year, y = after_stat(count), color = subgroup)) +
  geom_line(stat = "count") +
  labs(x = "Year", y = "Prevalence", color = "category of disaster") +
  ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
  theme_minimal()

total_disasters <- full_data %>%
  group_by(year, region) %>%
  summarise(total_disasters = n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Plotting
ggplot(full_data, aes(x = year, y = after_stat(count), color = climate_change_effect)) +
  geom_line(stat = "count") +
  geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
  labs(x = "Year", y = "Prevalence", color = "Climate Change Effect", linetype = "Legend") +
  scale_color_manual(values = c("dark green", "red", "blue", "grey")) +
  ggtitle("Prevalence of Natural Disasters from 2000 - 2022 broken down per region") +
  facet_wrap(~ region, scales = "free_x", nrow = 3) 

#how can i make my maps interactive 

interactive_plot <- ggplotly(
  ggplot(full_data, aes(x = year, y = after_stat(count), color = climate_change_effect)) +
    geom_line(stat = "count") +
    geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
    labs(x = "Year", y = "Prevalence", color = "Climate Change Effect", linetype = "Legend") +
    ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
    theme_minimal() +
    scale_color_manual(values = c("grey", "red", "orange", "black")) +
    facet_wrap(~ region, scales = "free_x", nrow = 3)
)

# Show interactive plot
interactive_plot
interactive_plot2 <- ggplotly(
  ggplot(full_data, aes(x = year, y = after_stat(count), color = subgroup)) +
    geom_line(stat = "count") +
    geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
    labs(x = "Year", y = "Prevalence", color = "subgroup", linetype = "Legend") +
    ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
    theme_minimal() +
    scale_color_manual(values = c("grey", "red", "orange", "black", "green", "pink", "purple")) +
    facet_wrap(~ region, scales = "free_x", nrow = 3)
)

interactive_plot2
interactive_plot3 <- ggplotly(
  ggplot(full_data, aes(x = year, y = after_stat(count), color = subgroup)) +
    geom_line(stat = "count") +
    geom_line(data = total_disasters, aes(y = total_disasters, color = "Total Disasters")) +
    labs(x = "Year", y = "Prevalence", color = "subgroup", linetype = "Legend") +
    ggtitle("Prevalence of Natural Disasters from 2000 - 2022") +
    theme_minimal() +
    scale_color_manual(values = c("grey", "red", "orange", "black", "green", "pink", "purple")) )

interactive_plot3